global root_dir = "`1'"

include "$root_dir/code/config/config.do"


cap noi log using ${log_dir}/import_classification.log, replace name(dat)

*Handle empty arguments
global arg1 = cond("`2'" == "___EMPTY___", "", "`2'")
global arg2 = cond("`3'" == "___EMPTY___", "", "`3'")
global arg3 = cond("`4'" == "___EMPTY___", "", "`4'")
global arg4 = cond("`5'" == "___EMPTY___", "", "`5'")

if "$arg1" != "" {
    global weight_category "$arg1"
    di "Weight category: ${weight_category}"
}

if "$arg2" != "" {
    global weight_versions "$arg2"
    di "Weight versions: ${weight_versions}"
}

if "$arg3" != "" {
    global weight_window "$arg3"
    di "Weight window: ${weight_window}"
}

if "$arg4" != "" {
	global wtype "$arg4"
}
di "${wtype}"
capture noi {



***********************************************************************************************************************
* import_classification.do
*
* Imports the csv patent lists generated by the python classification program
* Input: automation and placebo patents lists as csv files
*        Note: these are lists of families and not patents
* Output: patent lists in $datset_dir}/patent_lists/
***********************************************************************************************************************
*import at applin_id level
foreach xxx in automation95 automation90 automation80 automationnolabor95 automationX95 CNC80 robot80 automat80 CNC90 robot90 automat90 CNC95 robot95 automat95 {
	import delimited using ${classification_dir}/V6/patent_lists/`xxx'.csv, clear
	tempfile f`xxx'
	save `f`xxx''
}

foreach xxx in automation95 automation90 {
	foreach yyy in ipc4 ipc6XX ipc4_pairs {
		import delimited using ${classification_dir}/V6/patent_lists/`xxx'_`yyy'.csv, clear
		tempfile f`xxx'_`yyy'
		save `f`xxx'_`yyy''
	}
}

use ${dataset_dir}/patstat_orbis/docdb_family_id_cipc_codes.dta, clear
keep docdb_family_id
duplicates drop
tempfile hascode
save `hascode'

*merge to docdb_family_id
use appln_id docdb_family_id using ${commondata_dir}/patstat_2018b/family_info.dta, clear
foreach xxx in automation95 automation90 automation80 automationnolabor95 automationX95 CNC80 robot80 automat80 CNC90 robot90 automat90 CNC95 robot95 automat95 {
	mmerge docdb_family_id using `f`xxx'', unmatched(master)
	gen `xxx' = (_m == 3)
	drop _m
}
foreach xxx in automation95 automation90 {
	foreach yyy in ipc4 ipc6XX ipc4_pairs {
		mmerge docdb_family_id using `f`xxx'_`yyy'', unmatched(master)
		gen `xxx'_`yyy' = (_m == 3)
		drop _m
	}
}

*merge the ones with an ipc code to be sure
mmerge docdb_family_id using `hascode', unmatched(master)
gen family_has_codes = _m ==3
drop _m


drop family_has_codes

*save the patent lists

foreach a in 80 90 95 X95 {
	preserve
	keep if automation`a'
	keep appln_id
	save ${dataset_dir}/patent_list/auto`a'_patents.dta, replace
	restore
}


preserve
keep if automationnolabor95
keep appln_id
save ${dataset_dir}/patent_list/autonol95_patents.dta, replace
restore


foreach a in 95 90 {
	foreach yyy in ipc4 ipc6XX ipc4_pairs {
		preserve
		keep if automation`a'_`yyy'
		keep appln_id
		save ${dataset_dir}/patent_list/auto`a'_`yyy'_patents.dta, replace
		restore
	}
}

foreach p in 80 90 95 {
	ren automat`p' autm`p'
	ren robot`p' robo`p'
	foreach xxx in CNC`p' robo`p' autm`p' {
		preserve
		keep if `xxx'
		keep appln_id
		save ${dataset_dir}/patent_list/`xxx'_patents.dta, replace
		restore
	}
}

* Machinery and non-machinery placebos ("complements")

foreach xxx in phar auto chem {
	foreach q in 60 90 95 {
		import delimited using ${classification_dir}/V6/patent_lists/p`xxx'`q'_complement.csv, clear
		tempfile f`xxx'`q'
		save `f`xxx'`q''
	}
}


*merge in field descriptions
use ${dataset_dir}/patstat_orbis/docdb_family_id_cipc_codes.dta, clear
gen cipc4 = substr(cipc6,1,4)
ren cipc4 ipc_maingroup_symbol
mmerge ipc using ${commondata_dir}/patstat_2018b/ipc_techn_field.dta, unmatched(none)
ren ipc_maingroup_symbol cipc4 
keep if _m == 3
drop _m
gen cipc3 = substr(cipc4,1,3)
gen cipc3_excl = inlist(cipc3,"F41","F42")
gen cipc4_add = inlist(cipc4,"B42C","B07C")
gen cipc6_add = inlist(cipc6,"G05B19","B62D65","G05B2219")
gen tfphar = techn_field == "Pharmaceuticals"
gen tfchem = inlist(techn_field,"Organic fine chemistry","Macromolecular chemistry, polymers")
gen tfauto = inlist(techn_field,"Handling","Machine tools","Textile and paper machines","Other special machines")
replace tfauto = (cipc3_excl == 0) & (tfauto | cipc4_add | cipc6_add)
foreach xxx in phar chem auto {
	bys docdb_family_id : egen is`xxx' = max(tf`xxx')
}
keep docdb_family_id is*
duplicates drop

mmerge docdb_family_id using ${commondata_dir}/patstat_2018b/family_info.dta, unmatched(master) ukeep(appln_id)

*generates the complements
foreach xxx in phar auto chem {
	foreach q in 60 90 95 {
		mmerge docdb_family_id using `f`xxx'`q'', unmatched(master)
		gen p`xxx'`q' = (_m != 3) & is`xxx'
		drop _m
	}
}

*this saves the pauto patents
foreach xxx in phar auto chem {
	foreach q in 60 90 95 {
		preserve
		keep if p`xxx'`q'
		keep appln_id
		save ${dataset_dir}/patent_list/p`xxx'`q'_patents.dta, replace
		
		restore
	}
}


}
if _rc == 0 {
    display "Execution finished successfully."
}
else {
    display "Execution finished with errors."
}

cap log close dat